Overall

Row

Total Population

47,213,282

Average Household Size

3.9

Number of Counties

47

Row

Population per county - Top 25

Population per county - Bottom 22

Row

HH Size by County

Gender Distribution

Column

Population Distribution by County and Sex

Column

Proportion of gender - Overall

Proportion of Males in Each County

Farming

Column

Distribution of crops being grown

30 Heaviest Farming Counties

17 Lowest Farming Counties

Column

Magnitude of Farming Per Crop in Each County

Top 5 Counties Growing Each Crop

---
title: "Kenya National Census 2019"
author: "Antony Rono"
output: 
  flexdashboard::flex_dashboard:
    #theme: lumen
    orientation: rows
    vertical_layout: fill
    #runtime: shiny
    source_code: embed
editor_options: 
  chunk_output_type: console
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
#knitr::opts_chunk$set(fig.width=12, fig.height=10)

options(scipen = 99)
library(tidyverse)
library(tidytuesdayR)
library(scales)
library(tidyverse)
library(janitor)
library(ggrepel)
library(ggthemes)
library(plotly)
library(ggpubr)
library(RColorBrewer)
library(rKenyaCensus)
library(patchwork)
library(sp)
library(sf)
library(flexdashboard)

theme_set(theme_light())
```


```{r Load, include=FALSE}

#tt <- tt_load("2021-01-19")

# tt %>% 
#   
#   map(glimpse)


gender <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-19/gender.csv')
crops <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-19/crops.csv')
households <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-19/households.csv')

tt <- list(gender=gender, crops=crops, households=households)


gender <- tt$gender %>% 
  
  clean_names() %>% 
  
    mutate(county = str_to_title(county))


household <- tt$households %>% 
  
  clean_names() %>% 
  
  mutate(county = str_replace(county, "([a-z])([A-Z])", "\\1 \\2"),
         
         county = str_to_title(county)) %>% 
  
    mutate(county = (str_trim(county))
           
           )

crops <- tt$crops %>% 
  
  clean_names() %>% 
  
  rename(county = sub_county) %>% 
  
  mutate(county = str_to_title(county)
         
         ) 
```


Overall {data-icon="fa-globe"}
=======================================================================

Row {data-width=80}
-----------------------------------------------------------------------

### Total Population
```{r Overall population}

population <- household[1,2]$population %>% 
  
  comma()



valueBox(value = population, caption = "Population", icon = "fa-user-plus", color = "green")

```

### Average Household Size
```{r HH Size}

hh_size <- household[1,ncol(household)]

valueBox(value = hh_size, caption = "Average Household Size", icon = "fa-house-user", color = "orange")


```

### Number of Counties
```{r No. of counties}

valueBox(value = nrow(household)-1, caption = "Number of counties", icon = "fa-map-marker-alt", color = "coral")

```

Row {.tabset .tabset-fade}
-----------------------------------------------------------------------

### Population per county - Top 25

```{r Population per county - Top 25}


plot_by_county <- function(data){
  
  data %>% 
  
  filter(county != "Kenya") %>% 
  
  mutate(county = fct_reorder(county, population, sum, .desc = TRUE)) %>% 
  
  ggplot(aes(population, county, fill = county)) +
  
  geom_col() +
  
  scale_x_continuous(expand=c(0,0), label = comma) +
  
  theme(legend.position = "none") +
    
    coord_flip() +
    
    theme(axis.text.x = element_text(angle = 60, hjust = 1)) 
}


p <- household %>% 
  
  slice_max(population, n = 26) %>% 
  
  plot_by_county 


ggplotly(p, tooltip = c("x", "y"))
```

### Population per county - Bottom 22

```{r Population per county - Bottom 22}

p <- household %>% 
  
  slice_min(population, n = 22) %>% 
  
  plot_by_county


ggplotly(p, tooltip = c("x", "y"))

```


Row
-----------------------------------------------------------------------

### HH Size by County

```{r Visualize Households, dpi = 200}

p <- household %>% 

  filter(county != "Kenya") %>% 
  
  ggplot(aes(population, average_household_size, text = county)) +
  
  geom_jitter()+
  
  geom_text(aes(label = county), size = 2.5) +
  
  scale_x_continuous(labels = comma)+
  
  scale_y_continuous(labels = comma)



ggplotly(p, tooltip = c("x", "y", "text")) %>% 
  
  style(textposition = "right")

  
 # ggtitle("HH Size by County")
```


Gender Distribution {data-orientation=columns data-icon="fa-venus-mars"}
=======================================================================

Column {data-width=500}
-----------------------------------------------------------------------

### Population Distribution by County and Sex


```{r Visualize Gender by sex and county}

p <-  gender %>% 
  
  filter(county != "Total") %>% 
  
  pivot_longer( cols = c(male, female, intersex),
               
               names_to = "gender", values_to = "Population") %>% 
  
  mutate(county = str_to_title(county)) %>% 
  
  mutate(county = fct_reorder(county, total, sum),
         
         gender = fct_reorder(gender, Population, sum, desc = TRUE)
         
         ) %>% 
  
  ggplot(aes(Population, county, fill = gender)) +
  
  scale_x_continuous(labels = comma, expand = c(0,0))+
  
  geom_col() 
  # labs(title = "Population Distribution by Counties and Sex",
  #      subtitle = "The proportion of sex seems to be equal in all counties")


ggplotly(p)


```

Column {data-width=500}
-----------------------------------------------------------------------

### Proportion of gender - Overall
```{r Overall populatio}

 gender %>% 
  
  filter(county == "Total") %>% 
  
    pivot_longer( cols = c(male, female, intersex),
               
               names_to = "gender", values_to = "Population") %>% 
  
  mutate(pct = Population/total) %>% 
   
   select(gender, Population) %>% 
   
   plot_ly(labels = ~gender, values = ~ Population) %>% 
   
   add_pie(hole = 0.6)

   
# p <- gender %>% 
#   
#   filter(county == "Total") %>% 
#   
#   pivot_longer( cols = c(male, female, intersex),
#                
#                names_to = "gender", values_to = "Population") %>% 
#   
#     mutate(pct = round(Population/total,3),
#            
#            ymax = cumsum(pct),
#            
#            ymin = lag(ymax, default = 0),
#            
#            labelPosition = rowSums(across(c(ymax, ymin)))/2,
#            
#            label = paste0(gender,":\n", percent(pct))) %>% 
#            
#            #label =percent(pct) ) %>% 
#   
#   ggplot(aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=gender)) +
#   
#   geom_rect() +
#   
#   geom_text( x=2, aes(y=labelPosition, label=label, color=gender), size=4) + # x here controls label position (inner / outer)
#   
#   scale_fill_brewer(palette="BuPu") +
#   
#  # scale_color_brewer(palette=3) +
#   
#   coord_polar(theta="y") +
#   
#   xlim(c(-1, 4)) +
#   
#   theme_void() 
#   

  

```



### Proportion of Males in Each County

```{r Proportion of males by County}

p <- gender %>% 
  
  filter(county != "Total") %>% 
  
  mutate(pct_male = male/total,
         
         pct_female = female/total) %>% 
  
  ggplot(aes(total, pct_male, text = county)) +
  
  geom_jitter()+
  
  geom_text(aes(label = county), size = 2.5) +
  
  geom_hline(yintercept = 0.5, color = "red")+
  
  scale_x_continuous(labels = comma)+
  
  scale_y_continuous(labels = percent)


ggplotly(p, tooltip = c("x", "y", "text")) %>% 
  
  style(textposition = "right")
```

Farming {data-orientation=columns data-icon="fa-seedling"}
=======================================================================

Column {.tabset}
-----------------------------------------------------------------------


```{r Crops Clean}
## Crops as rows
crops_clean <- crops %>% 
  
  mutate_at(vars(-"county"), as.numeric) %>% 
  
  pivot_longer(cols = -c(farming, county),
               
               names_to = "crop",
               
               values_to = "households") %>% 
  
  filter(!is.na(households), county != "Kenya") %>% 
  
  mutate(county = str_to_title(county),
         
         county = fct_reorder(county, households, sum),
         
         crop = fct_reorder(crop, households, sum, desc = FALSE))


```



### Distribution of crops being grown
```{r overall crops breakdown}

p <- crops_clean %>% 
  
  group_by(crop) %>% 
  
  summarise(households = sum(households)) %>% 
  
  ggplot(aes(households, crop, fill = crop))+
  
  geom_col()+
  
  theme(legend.position = "none")+
  
  scale_x_continuous(expand = c(0,0))


ggplotly(p)
```

### 30 Heaviest Farming Counties

```{r Top 30 farming Counties}

p <- crops_clean %>%

  left_join(household, by = "county") %>%

  mutate(percent_doing_farming = farming/number_of_households ) %>%

  distinct(county, number_of_households, percent_doing_farming) %>%
  
  slice_max(order_by = percent_doing_farming, n = 30) %>% 

  ggplot(aes(number_of_households, percent_doing_farming)) +

  geom_point()+

  #geom_text(aes(label = county), vjust = -1, hjust = 1)+

  geom_text(aes(label = county))+

  scale_x_continuous(labels = comma)+

  scale_y_continuous(labels = percent)


ggplotly(p, tooltip = c("x", "y", "text")) %>% 
  
  style(textposition = "top-right")
```


### 17 Lowest Farming Counties

```{r 17 lowest farming Counties}

p <-   crops_clean %>%

  left_join(household, by = "county") %>%

  mutate(percent_doing_farming = farming/number_of_households ) %>%

  distinct(county, number_of_households, percent_doing_farming) %>%
  
  slice_min(order_by = percent_doing_farming, n = 17) %>% 

  ggplot(aes(number_of_households, percent_doing_farming)) +

  geom_point()+

  #geom_text(aes(label = county), vjust = -1, hjust = 1)+

  geom_text(aes(label = county))+

  scale_x_continuous(labels = comma)+

  scale_y_continuous(labels = percent)


ggplotly(p, tooltip = c("x", "y", "text")) %>% 
  
  style(textposition = "top-right")

```


Column {.tabset}
-----------------------------------------------------------------------


### Magnitude of Farming Per Crop in Each County

```{r Visualize Crop Heatmap}

## Heatmap of crops by county and HHs

p <- crops_clean%>% 
  
  complete(crop, county, fill = list(households = 0)) %>% 
  
  ggplot(aes(crop, county, fill = households)) +
  
  geom_tile() +
  
    scale_fill_gradient(low = "#e6550d",
                      high = "#fee6ce",
                      guide = "colorbar") +
  
  scale_y_discrete(expand=c(0,0))+
  
  theme(axis.text.x = element_text(angle = 60, hjust = 1))+
  
  labs(x = "Crop",
       
       y = "",
       
       fill = "No. of HHs growing this crop")


ggplotly(p)

```


### Top 5 Counties Growing Each Crop

```{r Visualize Top 5 Counties per Crop}

p <- crops_clean %>% 
  
  left_join(household, by = "county") %>% 
  
  mutate(percent_doing_farming = households/number_of_households ) %>% 
  
  group_by(crop) %>% 
  
  slice_max(order_by = percent_doing_farming,n= 5) %>% 
  
  ggplot(aes(number_of_households, percent_doing_farming, text = county)) +
  
  geom_point()+
  
  geom_text(aes(label = county), size = 2.5) +
  
  scale_x_continuous(labels = unit_format(unit = "K", scale = 1e-3))+
  
  scale_y_continuous(labels = percent) +
  
  facet_wrap(~crop, scales = "free_y", strip.position = "right") +
  
  theme(axis.title.y =element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()) +
  
  theme(axis.text.x = element_text(angle = 0, hjust = 1))
  


ggplotly(p, tooltip = c("x", "y", "text")) %>% 
  
  style(textposition = "top-right")
```